In [1]:
import pandas as pd
In [2]:
df = pd.read_csv("AB_NYC_2019.csv")
In [3]:
df.head(2)
Out[3]:
id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2539 | Clean & quiet apt home by the park | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 19-10-2018 | 0.21 | 6 | 365 |
1 | 2595 | Skylit Midtown Castle | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 21-05-2019 | 0.38 | 2 | 355 |
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48906 entries, 0 to 48905 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 48906 non-null int64 1 name 48890 non-null object 2 host_id 48906 non-null int64 3 host_name 48885 non-null object 4 neighbourhood_group 48906 non-null object 5 neighbourhood 48906 non-null object 6 latitude 48906 non-null float64 7 longitude 48906 non-null float64 8 room_type 48906 non-null object 9 price 48906 non-null int64 10 minimum_nights 48906 non-null int64 11 number_of_reviews 48906 non-null int64 12 last_review 38854 non-null object 13 reviews_per_month 38854 non-null float64 14 calculated_host_listings_count 48906 non-null int64 15 availability_365 48906 non-null int64 dtypes: float64(3), int64(7), object(6) memory usage: 6.0+ MB
In [5]:
df["id"] = df["id"].astype(str)
In [6]:
df["id"].dtype
Out[6]:
dtype('O')
In [7]:
df["host_id"] = df["host_id"].astype(str)
In [8]:
df["last_review"] = pd.to_datetime(df["last_review"])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_12044\3465608367.py:1: UserWarning: Parsing dates in %d-%m-%Y format when dayfirst=False (the default) was specified. Pass `dayfirst=True` or specify a format to silence this warning. df["last_review"] = pd.to_datetime(df["last_review"])
In [9]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48906 entries, 0 to 48905 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 48906 non-null object 1 name 48890 non-null object 2 host_id 48906 non-null object 3 host_name 48885 non-null object 4 neighbourhood_group 48906 non-null object 5 neighbourhood 48906 non-null object 6 latitude 48906 non-null float64 7 longitude 48906 non-null float64 8 room_type 48906 non-null object 9 price 48906 non-null int64 10 minimum_nights 48906 non-null int64 11 number_of_reviews 48906 non-null int64 12 last_review 38854 non-null datetime64[ns] 13 reviews_per_month 38854 non-null float64 14 calculated_host_listings_count 48906 non-null int64 15 availability_365 48906 non-null int64 dtypes: datetime64[ns](1), float64(3), int64(5), object(7) memory usage: 6.0+ MB